*===============================================================================
*Firm and Country Embedded Productivity (Alviarez, Cravino and Ramondo) 
*Constructiong bilateral shares at the level of the firm, sin(phi)
*Last Modified by Vanessa Alviarez 06/2020
*===============================================================================
do "TFP_firm_directory_historical.do"


*====================================================================
*Step 1: Data with common set of firms
*====================================================================
clear all
set more off

local var0 sales
local varset sales emp exports va labcost
*local types "naics naics3 naics4"
local types "naics"

etime, start
foreach type in `types' {

*local type="naics3"
display "`type'"

use year isocode id_bvd guo_bvd hq `varset' sector* NAICS* con_code using "${output}/firm_allyears_sales.dta", clear 
drop if `var0'==. | `var0'==0
tab con_code
drop if con_code=="$lf"
drop con_code
tab sector


if "`type'"=="naics4" {
drop if sector=="NA"
tab sector
local industry NAICS4 
}
if "`type'"=="naics3" {
drop if sector=="NA"
tab sector
local industry NAICS3 
}
if "`type'"=="naics" {
drop if sector=="NA"
tab sector
local industry sector 
}
if "`type'"=="market" {
keep if sector1=="Manufacturing (C)" | sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)" | sector1=="Other_Goods (A-B-D-E-F)" 
drop sector*
gen sector="Market (MARKT)"
gen sector1=sector
local industry sector 
}
if "`type'"=="MS" {
keep if sector1=="Manufacturing (C)" |  sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)" 
drop sector*
gen sector="MS"
gen sector1=sector
local industry sector 
}
if "`type'"=="manuf" {
keep if sector1=="Manufacturing (C)" 
drop sector*
gen sector="Manufacturing (C)"
gen sector1=sector
local industry sector 
}
if "`type'"=="serv" {
keep if sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)" 
drop sector*
gen sector="Market_Services (G-H-I-J-K-M-N-R-S-T)"
gen sector1=sector
local industry sector 
}
*

*Collapse at the level of the parent (this includes the sales of the GU-isocode at home as well as in other countries)
sort year isocode sector1 sector `industry' hq guo_bvd
collapse (sum) `varset', by(year isocode sector1 sector `industry' hq guo_bvd)

**I am computing MNC dummy (WITHIN A SECTOR) based on the number of countries  
*by year sector1 sector guo_bvd isocode, sort: gen a=_n==1
*tab a
*by year sector1 sector guo_bvd, sort: egen num_iso=total(a)
*drop a

*I am computing MNC dummy (across ALL SECTORS) based on the number of countries  
by year guo_bvd isocode, sort: gen a=_n==1
tab a
by year guo_bvd, sort: egen num_iso=total(a)
drop a

gen MNC=0
replace MNC=1 if num_iso>1
tab MNC
tab MNC if isocode!=hq
drop if MNC==0 & isocode==hq
tab MNC
drop if MNC==0 
*The above line keeps firms with only one affiliate but in a foreign country
*Rather not, you need to be in at least 2 countries (at the sector level) to be in the sample

**********************************************************
foreach vv of local varset {
display "`vv'"
rename `vv' sin_`vv'
}
**********************************************************
sort year hq isocode
tempfile p1
save `p1', replace

keep if sin_`var0'!=. & sin_`var0'!=0 
compress
tempfile p2
save `p2', replace


*-----------------------------------------
*Bring production Klems-OECD data --- Xn, Xnn, Xn-exp (this files were prepared in 'aggregate_data_sales.do' files ----
*-----------------------------------------
*Variables: GO_usd GO_usd_exp sales Xnn Xnn_orbis Xnn2
*GO_usd (sales in MM USD) ---- yn/yi ----
*sales (sales in MM USD -- as measured in Orbis)  ---- yn/yi ----
*GO_usd_exp (revenue - exports MM USD)  ---- yn_exp/yi_exp -----
*Xnn (revenue by local producers, this is excluding the sales of foreign affiliates) ----- ynn/yii -----
*Xnn2 (Xnn but replaces with Xnn_orbis whenever itcontains missing values) ----- ynn/yii -------
*klems_oecd_unido_orbis_sales_emp_exp_0514
merge m:1 year sector isocode using "${output}/klems_oecd_unido_orbis_sales_emp_exp.dta", keepusing(EMP* GO_usd GO_usd_exp VA_usd exports LAB_usd data_source) 
keep if year>=2005 & year<=2017
drop if _merge==2 
drop _merge

***I have to change this***
gen Xnn2=GO_usd
***I have to change***

rename VA_usd yn_va_ko
rename EMPE yn_emp_ko
rename exports yn_exp_ko
rename GO_usd yn_`var0'_ko
rename LAB_usd yn_labcost_ko
rename Xnn2 ynn_`var0'_ko
rename GO_usd_exp yn_`var0'_exp_ko
format %9.0fc yn_`var0'_ko ynn_`var0'_ko yn_`var0'_exp_ko yn_exp_ko yn_va_ko yn_labcost_ko

tempfile t0
save `t0', replace

*-----------------------------------
use `t0', clear
keep year isocode sector1 sector `industry' yn_`var0'_ko yn_emp_ko  yn_va_ko ynn_`var0'_ko yn_`var0'_exp_ko  yn_exp_ko yn_labcost_ko
duplicates drop 
collapse (sum) yn_`var0'_ko yn_emp_ko yn_va_ko ynn_`var0'_ko yn_`var0'_exp_ko  yn_exp_ko yn_labcost_ko, by(year isocode sector1 sector `industry')
tempfile t1
save `t1', replace
*--------------------------------------

use `t0', clear
collapse (sum) sin_sales sin_emp sin_va sin_exp sin_labcost, by(year guo_bvd isocode hq  sector1 sector `industry')
merge m:1 year isocode sector1 sector `industry' using `t1'
drop _merge

foreach i in sin_sales sin_emp sin_va sin_exp sin_labcost yn_`var0'_ko yn_emp_ko ynn_`var0'_ko yn_`var0'_exp_ko  yn_exp_ko yn_labcost_ko {
replace `i'=. if `i'==0
}
tempfile final
save `final', replace


*--------------------------------------------------
*Construct the relevant shares 
*--------------------------------------------------
use `final', clear

display "Drop firms below 100,000 USD" 
sum sin_`var0', d
sum sin_`var0' if sin_`var0'<0.1 
drop if sin_`var0'<0.1 


gen sin_emp_ko=sin_emp/yn_emp_ko
gen sin_va_ko=sin_va/yn_va_ko
gen sin_labcost_ko=sin_labcost/yn_labcost_ko
gen sin_`var0'_ko=sin_`var0'/yn_`var0'_ko
gen sin_`var0'_local_ko=sin_`var0'/ynn_`var0'_ko
gen sin_`var0'_exp_ko=(sin_`var0'-sin_exp)/yn_`var0'_exp_ko


*HK, SG and JP are the isocodes barely affected by this (I have to take a look to the aggregates for JP in 2017) 
drop if sin_sales_ko>=1 | sin_sales_ko<=0 | sin_sales_ko==.
replace sin_emp_ko=. if sin_emp_ko>=1 | sin_emp_ko<=0
replace sin_va_ko=. if sin_va_ko>=1 | sin_va_ko<=0
replace sin_labcost_ko=. if sin_labcost_ko>=1 | sin_labcost_ko<=0
replace sin_`var0'_exp_ko=. if sin_`var0'_exp_ko>=1 | sin_`var0'_exp_ko<=0


*--------------------------------------------------
*Create lags (growth excercise)
*--------------------------------------------------
*local y=2017
*local gap=7
*local y0=`y'-`gap'
*display "`y0'"
egen id = group(isocode sector1 sector `industry' guo_bvd) 
sort id year
xtset id year 
foreach vv in sales emp {
foreach tt in 1 2 3 5 6 7 8 9 10 11 {
display "`vv'; `tt'"
by id, sort: gen l`tt'_sin_`vv'_ko = l`tt'.sin_`vv'_ko
label var l`tt'_sin_`vv'_ko "lag `tt' of sin_`vv'_ko"
}
}
drop id
xtset, clear



*Keep only MNC (two countries, which includes two or more affiliates) and IN THE SECTOR 
*----------------------------------------------
by year sector guo_bvd isocode, sort: gen a=_n==1
by year sector guo_bvd, sort: egen b=total(a)
tab b 
keep if b>=2
drop a b 
format %9.4fc sin* 
format %9.0fc *`var0' yn* 
sort sector1 sector `industry' hq isocode
order sector1 sector `industry' hq isocode sin_`var0'_ko sin_`var0'_local_ko sin_`var0'_exp_ko 


*--------------------------------------------------
*Create percentiles 
*--------------------------------------------------
foreach pos in 10 20 30 40 50 60 70 80 90 {
display `pos'
gsort year isocode sector1 sector `industry' -sin_sales
by year isocode sector1 sector `industry': egen p`pos'_sales = pctile(sin_sales), p(`pos')
format %9.3fc p`pos'_sales
label var p`pos'_sales "pctile `pos' of sin_sales"
}
*

*--------------------------------------------------
*Number of countries in which the affiliate operates (outside home)
*--------------------------------------------------
sort year sector1 `industry' hq guo_bvd isocode
by year sector1 `industry' hq guo_bvd isocode, sort: gen a=_n==1
replace a=0 if isocode==hq
by year sector1 `industry' hq guo_bvd, sort: egen num_ctry=total(a)
drop a
label var num_ctry "number of foreign countries GUO operates within `type'"

tempfile LCS
save `LCS', replace


*Firms in the Largest Connected Set (all checked, naics, naics3 and naics4, are all connected)
*----------------------------------------------
use `LCS', clear
keep year `industry' guo_bvd isocode iso sin_`var0' sin_`var0'_ko yn_`var0'_ko
duplicates drop
sort year isocode guo_bvd
egen iso=group(isocode)
egen guo=group(guo_bvd)
egen sss=group(`industry')
order year `industry' guo_bvd isocode sss guo iso sin_`var0' sin_`var0'_ko yn_`var0'_ko
sort year sss iso guo 
keep if isocode!=""
keep if isocode=="JP" | isocode=="IT" | isocode=="KR" | isocode=="DE" | isocode=="FR" | isocode=="ES" |  isocode=="MX" |  isocode=="GB" |  isocode=="PL" |  isocode=="GR" |  isocode=="NL" |  isocode=="RO" |  isocode=="BE" |  isocode=="AT" |  isocode=="PT" |  isocode=="FI" |  isocode=="SE" |  isocode=="CZ" |  isocode=="SK" |  isocode=="BG" |  isocode=="HU" |  isocode=="HR" |  isocode=="LV" |  isocode=="SI" |  isocode=="LT" |  isocode=="EE" 
save "${output}/LCS_`type'.dta", replace 
keep year sss iso guo sin_`var0' sin_`var0'_ko yn_`var0'_ko
sort year sss iso guo 
order year sss guo iso sin_`var0' sin_`var0'_ko yn_`var0'_ko 
export delimited using "${output}/LCS_`type'", novarnames replace


*labeling variables of interest 
*------------------------
use `LCS', clear
label var sin_emp_ko "sin_emp/yn_emp (den KLEMS/OECD)"
label var sin_va_ko "sin_va/yn_va (den KLEMS/OECD)"
label var sin_`var0'_ko "sin_`var0'/yn (den KLEMS/OECD)"
label var sin_`var0'_local_ko "sin_`var0'/ynn (den KLEMS/OECD)"
label var sin_`var0'_exp_ko "(sin_`var0'-sin_exp)/yn_`var0'_exp_ko (den KLEMS/OECD/WIOT)"
label var sin_`var0' "`var0' of i MNCs operating in n (cond operating in i)"
label var sin_emp "emp of i MNCs operating in n (cond operating in i)"
label var sin_exp "exports of i MNCs operating in n (cond operating in i)"
label var sin_va "value added of i MNCs operating in n (cond operating in i)"
label var sin_labcost "Labor cost of i MNCs operating in n (cond operating in i)"

label var yn_`var0'_ko "total `var0' in country n"
label var yn_emp_ko "total emp in country n"
label var yn_va_ko "total va in country n"
label var yn_labcost_ko "total labor cost in country n"
label var yn_exp_ko "total exports in country n"
label var ynn_`var0'_ko "total `var0' of local firms in country n"
label var yn_`var0'_exp_ko "total `var0' (minus exports) in country n"
compress

tempfile tt_b0
save `tt_b0', replace

*Saving the file
*------------------------
drop if year==.
order year sector1 sector `industry' hq isocode sin_`var0'_ko sin_`var0'_local_ko sin_`var0'_exp_ko 
compress
tempfile sin_firmlevel_`type'
save `sin_firmlevel_`type'', replace
}
etime 



*===============================================================================
*Computing the Largest Connected Set (all sector are connected in all years!)
*===============================================================================
/*
local types "naics naics3 naics4"

foreach type in `types' {
clear all
import delimited "${output}/LCS_`type'_fromMatlab.csv"
rename v1 year
rename v2 sss
rename v3 guo
rename v4 iso
duplicates drop 
tempfile LCS_matlab
save `LCS_matlab', replace

use "${output}/LCS_`type'.dta", clear
merge 1:1 year sss guo iso using `LCS_matlab'
tab _merge
}
*
*/

*===============================================================================
*Step 2(b): Drop outlier observations 
*===============================================================================
display "Drop foreing affiliates below 1MM USD and Parents below 5MM USD"
clear all
local whattype naics
use `sin_firmlevel_`whattype'', clear

sum sin_`var0', d
sum sin_`var0' if sin_`var0'<1 
tab isocode if sin_`var0'>=1
display "Drop firms below 1 MILLION USD" 
*drop if sin_`var0'<1 

duplicates drop 
compress 
tempfile sample_firms
save `sample_firms', replace




*===============================================================================
*Temporary check on gravity variables
*===============================================================================
use "${inputs}/gravdata_00_16.dta", clear
keep if year==2016
replace year=2017
tempfile temp
save `temp', replace

use "${inputs}/gravdata_00_16.dta", clear
append using `temp'
tempfile gravdata_00_17
save `gravdata_00_17', replace



*===============================================================================
*Step 2: Additions: include gravity variables and a dummy of constant MNC across years  
*===============================================================================
clear all
set more off

etime, start 
foreach type in `types' {

*local type naics  
display "`type'"

if "`type'"=="naics4" {
local industry NAICS4 
}
if "`type'"=="naics3" {
local industry NAICS3 
}
else {
local industry sector
}

*Bring gravity variables 
*-----------------------------------------------
use `sin_firmlevel_`type'', clear


*Drop the outliers
*----------------------------------------------- 
merge m:1 year hq isocode sector guo_bvd using `sample_firms', keepusing(year hq isocode guo_bvd)
keep if _merge==3
drop _merge
*-----------------------------------------------


gen iso2_o=hq 
gen iso2_d=isocode
local trade_var distw fta_wto contig comlang_off colony  pop_o pop_d gdp_o gdp_d gdpcap_o gdpcap_d   
merge m:1 iso2_o iso2_d year using `gravdata_00_17', keepusing(`trade_var')

drop if _merge==2
drop _merge
foreach var in iso2_o iso2_d {
encode `var', gen(`var'b)
drop `var'
rename `var'b `var'
}
*
gen distance=ln(distw)
drop distw
drop iso2_o iso2_d
rename fta_wto RTA
label var distance "ln distance" 
label var RTA "Regional Trade Aggrement" 
label var comlang_off "Common Language" 
label var contig "commnon Border" 
label var colony "Former Colony" 
compress


*Flag MNCs number of years it has information (out of yy years)
*-----------------------------------------------
by sector1 sector guo_bvd isocode year, sort: gen a=_n==1
replace a=0 if year<2010
by sector1 sector guo_bvd isocode, sort: egen years_after2010=total(a)
drop a
by sector1 sector guo_bvd isocode year, sort: gen a=_n==1
by sector1 sector guo_bvd isocode, sort: egen years_allperiod=total(a)
drop a
gen sample_const=1
compress
save  "${output}/sin_firmlevel_`type'_gravity$lf.dta", replace
}
etime



******************************************************************************
clear all
local type naics 
use "${output}/sin_firmlevel_`type'_gravity$lf.dta", clear 
*use `sin_firmlevel_`type'', clear
******************************************************************************


